Supervised clustering
Check the heatmap of a subset of the data to see if replicates look
similar to each other. This type of heatmap cluster is supervised, since
we are picking the 20 genes with the biggest changes.
It looks like 3 of the MSDINs are included in this list of top 20
genes with biggest changes, including the putative leaderless!
# find 15 genes with highest expression levels from dds
select <- order(rowMeans(counts(dds,normalized=TRUE)),
decreasing=TRUE)[1:15]
# make new data frame with only Population stage info
df <- as.data.frame(colData(dds)[,c("Population")])
colnames(df) <- c("Population")
#samples <- colnames(assay(ntd))
samples <- metadata$Sample_ID
rownames(df) <- samples
# heatmap of variance stabilized data
library(pheatmap)
# extract data
mat <- assay(vsd)[select,]
top15 <- row.names(mat)
colnames(mat) <- metadata$Sample_ID
# remove the random prefix and suffixes
row.names(mat) <- gsub("^[^-]+-|\\.m01$", "", row.names(mat))
row.names(mat)
## [1] "Ap.00g078360" "Ap.00g054880" "Ap.00g019870" "Ap.00g054640" "Ap.00g057760"
## [6] "MLGFLVLP" "Ap.00g068810" "Ap.00g039900" "SFFFPIP" "Ap.00g008070"
## [11] "GPVFFA" "Ap.00g078370" "Ap.00g011790" "Ap.00g001030" "Ap.00g063180"
pheatmap(mat, cluster_rows=FALSE,
show_rownames=TRUE,
cluster_cols=TRUE, annotation_col=df)

# also make heatmap of genes with custom colors
genotype_colors <- c("#723D00","#fb8500","#ffb703",
"#023047","#0C556E","#177995","#219EBC","#5CB7CE","#96CFE0","#D1E8F2")
annotation_colors <- list(Population = setNames(genotype_colors, unique(df$Population)))
# Define your custom color scale (from dark blue to aqua)
my_colors <- colorRampPalette(c("grey90","#03045e"))(100)
pheatmap(mat, cluster_rows=FALSE,
show_rownames=TRUE,
cluster_cols=TRUE,
annotation_col=df,
annotation_colors = annotation_colors,
legend=TRUE,
color=my_colors,
border_color = "transparent",
fontsize_row = 10, # Row label font size
fontsize_col = 10)
##Do we know any functional information about the highly expressed
transcripts?
I used the following code to extract all the protein sequences that
match the highly expressed genes, and then I used the online NCBI blastp
to search human and Aspergillus fumigatus for closely related proteins
with known functions or names.
Next let’s figure out which genes are highly expressed…
library(rtracklayer)
library(Biostrings)
## Loading required package: XVector
##
## Attaching package: 'Biostrings'
## The following object is masked from 'package:base':
##
## strsplit
library(GenomicFeatures)
## Loading required package: AnnotationDbi
##
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:dplyr':
##
## select
# Define file paths
genome_file <- "/Volumes/Liv/Lab_projects/Amanita_phalloides/aphalloides_reff/reff/10511_Aphal_PT_AllpathsLG_LINKS_jelly_pilon.fna"
# make sure chromosomes are assigned correctly
library(Rsamtools)
# Load Genome Sequence
genome_fa <- FaFile(genome_file)
class(genome)
## [1] "standardGeneric"
## attr(,"package")
## [1] "methods"
names(genome)
## NULL
# Load GFF3
gff <- paste0(dir,"/aphal_reff_with_msdins.gff3")
gff_sorted <- read_tsv(gff, comment = "#", col_names = FALSE,show_col_types = FALSE)
colnames(gff_sorted) <- c("seqid", "source", "type", "start", "end", "score", "strand", "phase", "attributes")
sum(is.na(gff_sorted$start)) # Count missing start positions
## [1] 0
sum(is.na(gff_sorted$end))
## [1] 0
# Create TxDb from GFF3
options(scipen = 999)
txdb <- makeTxDbFromGFF(gff, format="gff3")
## Warning in call_fun_in_txdbmaker("makeTxDbFromGFF", ...): makeTxDbFromGFF() has moved from GenomicFeatures to the txdbmaker package,
## and is formally deprecated in GenomicFeatures >= 1.59.1. Please call
## txdbmaker::makeTxDbFromGFF() to get rid of this warning.
## Import genomic features from the file as a GRanges object ...
## OK
## Prepare the 'metadata' data frame ... OK
## Make the TxDb object ...
## Warning in .makeTxDb_normarg_chrominfo(chrominfo): genome version information
## is not available for this TxDb object
## OK
# save txdb object
saveDb(txdb, file = paste0(dir,"/txdb.sqlite"))
## TxDb object:
## # Db type: TxDb
## # Supporting package: GenomicFeatures
## # Data source: /Volumes/Liv/Lab_projects/Amanita_phalloides/bin/Scripts_for_publication/aphal_reff_with_msdins.gff3
## # Organism: NA
## # Taxonomy ID: NA
## # miRBase build ID: NA
## # Genome: NA
## # Nb of transcripts: 8812
## # Db created by: txdbmaker package from Bioconductor
## # Creation time: 2025-08-08 10:55:51 -0500 (Fri, 08 Aug 2025)
## # txdbmaker version at creation time: 1.4.2
## # RSQLite version at creation time: 2.4.2
## # DBSCHEMAVERSION: 1.2
# Extract CDS sequences from genome
transcripts <- exonsBy(txdb, by="tx", use.names=TRUE)
cds_seqs <- extractTranscriptSeqs(genome_fa, transcripts)
# find cds that might not contain complete codons (length 3)
invalid_cds <- names(cds_seqs)[width(cds_seqs) %% 3 != 0]
invalid_cds
## [1] "26557-Ap.00g000010.m01" "26557-Ap.00g002410.m01" "26557-Ap.00g069340.m01"
## [4] "26557-Ap.00g069850.m01" "26557-Ap.00g074640.m01" "26557-Ap.00g074690.m01"
## [7] "26557-Ap.00g028120.m01" "26557-Ap.00g077690.m01" "26557-Ap.00g078010.m01"
## [10] "AWLATCP_2" "AWLATCP_2.m01" "FNLFRFPYP"
## [13] "FNLFRFPYP.m01" "26557-Ap.00g078580.m01" "26557-Ap.00g078590.m01"
## [16] "LIQRPFAP_1" "LIQRPFAP_1.m01" "26557-Ap.00g028540.m01"
## [19] "26557-Ap.00g078640.m01" "26557-Ap.00g079210.m01" "26557-Ap.00g079670.m01"
## [22] "26557-Ap.00g080450.m01" "26557-Ap.00g081210.m01" "SFFFPIP"
## [25] "SFFFPIP.m01" "26557-Ap.00g031270.m01" "LFFWFWFLWP"
## [28] "LFFWFWFLWP.m01" "26557-Ap.00g030190.m01" "26557-Ap.00g082400.m01"
## [31] "26557-Ap.00g082490.m01" "26557-Ap.00g082500.m01" "26557-Ap.00g082510.m01"
## [34] "26557-Ap.00g082650.m01" "FFFPPFFIPP" "FFFPPFFIPP.m01"
## [37] "MYNPPYFLPP" "MYNPPYFLPP.m01" "VQKPWSRP"
## [40] "VQKPWSRP.m01" "LGRPESLP" "LGRPESLP.m01"
## [43] "IRLPPLFLPP" "IRLPPLFLPP.m01" "LRLPPFMIPP"
## [46] "LRLPPFMIPP.m01" "FIFPPFFIPP" "FIFPPFFIPP.m01"
## [49] "26557-Ap.00g083540.m01" "26557-Ap.00g083580.m01" "26557-Ap.00g083810.m01"
## [52] "26557-Ap.00g083820.m01" "26557-Ap.00g032470.m01" "26557-Ap.00g033090.m01"
## [55] "26557-Ap.00g083830.m01" "26557-Ap.00g083850.m01" "26557-Ap.00g033600.m01"
## [58] "26557-Ap.00g084160.m01" "MLPGMVAFS" "MLPGMVAFS.m01"
## [61] "26557-Ap.00g084380.m01" "26557-Ap.00g084500.m01" "26557-Ap.00g003000.m01"
## [64] "26557-Ap.00g005340.m01" "AWLVDCP" "AWLVDCP.m01"
## [67] "26557-Ap.00g033680.m01" "26557-Ap.00g084700.m01" "26557-Ap.00g084970.m01"
## [70] "26557-Ap.00g034010.m01" "26557-Ap.00g085350.m01" "26557-Ap.00g085650.m01"
## [73] "26557-Ap.00g085660.m01" "26557-Ap.00g085760.m01" "LILLAALGIP"
## [76] "LILLAALGIP.m01" "FNILPFMLPP" "FNILPFMLPP.m01"
## [79] "GVILIIP" "GVILIIP.m01" "LPILPIPPLP"
## [82] "LPILPIPPLP.m01" "AWLATCP_1" "AWLATCP_1.m01"
## [85] "IIGILLPP" "IIGILLPP.m01" "26557-Ap.00g085780.m01"
## [88] "26557-Ap.00g085810.m01" "FFPIVFSPP" "FFPIVFSPP.m01"
## [91] "26557-Ap.00g035390.m01" "GPVFFA" "GPVFFA.m01"
## [94] "26557-Ap.00g036150.m01" "26557-Ap.00g038290.m01" "26557-Ap.00g086220.m01"
## [97] "MLGFLVLP" "MLGFLVLP.m01" "26557-Ap.00g038790.m01"
## [100] "26557-Ap.00g086250.m01" "26557-Ap.00g006790.m01" "26557-Ap.00g038890.m01"
## [103] "26557-Ap.00g038900.m01" "26557-Ap.00g086340.m01" "26557-Ap.00g086350.m01"
## [106] "26557-Ap.00g039780.m01" "26557-Ap.00g086430.m01" "26557-Ap.00g041060.m01"
## [109] "26557-Ap.00g041080.m01" "ISDPTAYP" "ISDPTAYP.m01"
## [112] "26557-Ap.00g041800.m01" "26557-Ap.00g086830.m01" "26557-Ap.00g086840.m01"
## [115] "26557-Ap.00g087100.m01" "26557-Ap.00g087150.m01" "26557-Ap.00g008720.m01"
## [118] "26557-Ap.00g013880.m01" "26557-Ap.00g049740.m01" "26557-Ap.00g087170.m01"
## [121] "IWGIGCNP" "IWGIGCNP.m01" "IWGIGCDP_1"
## [124] "IWGIGCDP_1.m01" "IWGIGCDP_2" "IWGIGCDP_2.m01"
## [127] "26557-Ap.00g050370.m01" "26557-Ap.00g049970.m01" "26557-Ap.00g050380.m01"
## [130] "26557-Ap.00g050390.m01" "26557-Ap.00g050400.m01" "26557-Ap.00g051340.m01"
## [133] "FMPLAP" "FMPLAP.m01" "IFLAFPIPP"
## [136] "IFLAFPIPP.m01" "HFASFIPP" "HFASFIPP.m01"
## [139] "TIYYLYFIP" "TIYYLYFIP.m01" "26557-Ap.00g087420.m01"
## [142] "26557-Ap.00g013920.m01" "26557-Ap.00g014520.m01" "26557-Ap.00g053140.m01"
## [145] "26557-Ap.00g055300.m01" "26557-Ap.00g014670.m01" "26557-Ap.00g014680.m01"
## [148] "26557-Ap.00g018860.m01" "26557-Ap.00g055490.m01" "26557-Ap.00g057600.m01"
## [151] "LIQRPFAP_2" "LIQRPFAP_2.m01" "26557-Ap.00g058170.m01"
## [154] "26557-Ap.00g023560.m01" "26557-Ap.00g066240.m01" "26557-Ap.00g068680.m01"
## [157] "IFWFIYFP" "IFWFIYFP.m01"
# check for any invalid bases in the CDS sequences
invalid_bases <- grepl("[^ATCG]", as.character(cds_seqs))
invalid_seqs <- cds_seqs[invalid_bases]
invalid_seqs
## DNAStringSet object of length 8:
## width seq names
## [1] 633 ATGCAGAATCAACCTTCCGGTTC...TCTTAATTGATTTGATGCTATAA 26557-Ap.00g02881...
## [2] 1416 ATGTCCACGCTCGACGCGCTCTT...AGTTTGGGATCGTTTATGGCTAA 26557-Ap.00g08106...
## [3] 165 ATGGGTTCGTATTCTTCCAAATA...CTACCAAAGGCACTGATTTCTAG 26557-Ap.00g03087...
## [4] 1176 ATGACCGAAACCCCCCATGGCAG...AAGCAATGTTCTGGAAATATTAG 26557-Ap.00g08395...
## [5] 861 ATGGTTGATCCTGTTCCCAATGT...GAGATCTAATAANCAAAATATAG 26557-Ap.00g04095...
## [6] 957 ATGGATGTTCCCGTTGCACAGCA...GTCGNGATGCTGAGAGCCAATAG 26557-Ap.00g00932...
## [7] 915 ATGAACACCAAACGTGTCACAGA...AACGACCGTCCATGGACGGTTAA 26557-Ap.00g05106...
## [8] 2004 ATGACGCAGCGACCAGACGCTCT...CCGTAAAATATCTGTTGAGTTGA 26557-Ap.00g01398...
# remove seqs with invalid bases
cds_seqs <- cds_seqs[!invalid_bases]
# Translate to protein sequences
protein_seqs <- translate(cds_seqs)
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[1]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[259]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[357]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[382]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[887]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[893]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[1456]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[1472]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[1501]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[1505]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[1506]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[1558]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[1559]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[1562]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[1563]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[1573]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[1574]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[1590]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[1628]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[1674]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[1742]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[1783]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[1940]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2019]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2020]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2098]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2124]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2125]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2142]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2262]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2271]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2272]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2273]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2285]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2398]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2399]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2451]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2452]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2453]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2454]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2455]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2456]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2460]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2461]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2462]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2463]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2464]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2465]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2509]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2525]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2536]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2537]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2538]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2600]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2601]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2603]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2685]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2687]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2699]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2700]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2711]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2718]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2728]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2848]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2935]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2936]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2967]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[2976]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[3044]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[3092]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[3119]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[3161]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[3162]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[3172]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[3183]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[3184]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[3186]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[3187]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[3188]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[3189]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[3190]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[3191]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[3200]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[3201]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[3229]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[3230]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[3246]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[3251]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[3315]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[3316]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[3329]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[3477]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[3478]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[3480]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[3610]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[3635]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[3657]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[3658]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[3668]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[3669]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[3744]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[3829]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[3833]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[3838]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[3839]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[3932]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[4001]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[4058]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[4095]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[4269]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[4270]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[4325]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[4382]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[4383]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[4667]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[4813]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[5141]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[5407]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[5707]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[5710]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[5718]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[5719]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[5720]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[5721]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[5733]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[5734]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[5765]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[5766]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[5803]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[5804]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[5806]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[5848]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[5855]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[5856]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[5859]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[5860]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[5861]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[5862]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[5912]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[5913]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[5914]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[6033]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[6058]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[6156]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[6320]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[6381]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[6382]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[6951]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[7106]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[7317]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[7495]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[7496]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[7553]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[8366]]': last base was ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[8531]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[8775]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[8800]]': last 2 bases were ignored
## Warning in .Call2("DNAStringSet_translate", x, skip_code,
## dna_codes[codon_alphabet], : in 'x[[8801]]': last 2 bases were ignored
head(names(protein_seqs))
## [1] "26557-Ap.00g000010.m01" "26557-Ap.00g000020.m01" "26557-Ap.00g000030.m01"
## [4] "26557-Ap.00g000050.m01" "26557-Ap.00g000060.m01" "26557-Ap.00g000100.m01"
# fix names - remove "26557-" and ".m*"
names(protein_seqs) <- sub("^26557-", "", names(protein_seqs))
names(protein_seqs) <- sub("\\.m.*$", "", names(protein_seqs))
head(names(protein_seqs))
## [1] "Ap.00g000010" "Ap.00g000020" "Ap.00g000030" "Ap.00g000050" "Ap.00g000060"
## [6] "Ap.00g000100"
# Write to a FASTA file
writeXStringSet(protein_seqs, paste0(dir,"/aphal_proteins.fasta"))
cat("Protein sequences saved to aphal_proteins.fasta\n")
## Protein sequences saved to aphal_proteins.fasta